In [1]:
# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
In [2]:
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
In [4]:
nltk.download('all')
[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_cat is already up-to-date!
[nltk_data]    | Downloading package cess_esp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_esp is already up-to-date!
[nltk_data]    | Downloading package chat80 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package chat80 is already up-to-date!
[nltk_data]    | Downloading package city_database to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package city_database is already up-to-date!
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package comparative_sentences to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comparative_sentences is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package comtrans to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comtrans is already up-to-date!
[nltk_data]    | Downloading package conll2000 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2000 is already up-to-date!
[nltk_data]    | Downloading package conll2002 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2002 is already up-to-date!
[nltk_data]    | Downloading package conll2007 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2007 is already up-to-date!
[nltk_data]    | Downloading package crubadan to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package crubadan is already up-to-date!
[nltk_data]    | Downloading package dependency_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dependency_treebank is already up-to-date!
[nltk_data]    | Downloading package dolch to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dolch is already up-to-date!
[nltk_data]    | Downloading package europarl_raw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package europarl_raw is already up-to-date!
[nltk_data]    | Downloading package floresta to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package floresta is already up-to-date!
[nltk_data]    | Downloading package framenet_v15 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v15 is already up-to-date!
[nltk_data]    | Downloading package framenet_v17 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v17 is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package ieer to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ieer is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package indian to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package indian is already up-to-date!
[nltk_data]    | Downloading package jeita to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package jeita is already up-to-date!
[nltk_data]    | Downloading package kimmo to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package kimmo is already up-to-date!
[nltk_data]    | Downloading package knbc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package knbc is already up-to-date!
[nltk_data]    | Downloading package lin_thesaurus to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package lin_thesaurus is already up-to-date!
[nltk_data]    | Downloading package mac_morpho to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mac_morpho is already up-to-date!
[nltk_data]    | Downloading package machado to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package machado is already up-to-date!
[nltk_data]    | Downloading package masc_tagged to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package masc_tagged is already up-to-date!
[nltk_data]    | Downloading package moses_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package moses_sample is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to-date!
[nltk_data]    | Downloading package names to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package names is already up-to-date!
[nltk_data]    | Downloading package nombank.1.0 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nombank.1.0 is already up-to-date!
[nltk_data]    | Downloading package nps_chat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nps_chat is already up-to-date!
[nltk_data]    | Downloading package omw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package omw is already up-to-date!
[nltk_data]    | Downloading package opinion_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package opinion_lexicon is already up-to-date!
[nltk_data]    | Downloading package paradigms to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package paradigms is already up-to-date!
[nltk_data]    | Downloading package pil to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pil is already up-to-date!
[nltk_data]    | Downloading package pl196x to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pl196x is already up-to-date!
[nltk_data]    | Downloading package ppattach to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ppattach is already up-to-date!
[nltk_data]    | Downloading package problem_reports to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package problem_reports is already up-to-date!
[nltk_data]    | Downloading package propbank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package propbank is already up-to-date!
[nltk_data]    | Downloading package ptb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ptb is already up-to-date!
[nltk_data]    | Downloading package product_reviews_1 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_1 is already up-to-date!
[nltk_data]    | Downloading package product_reviews_2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_2 is already up-to-date!
[nltk_data]    | Downloading package pros_cons to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pros_cons is already up-to-date!
[nltk_data]    | Downloading package qc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package qc is already up-to-date!
[nltk_data]    | Downloading package reuters to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package reuters is already up-to-date!
[nltk_data]    | Downloading package rte to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rte is already up-to-date!
[nltk_data]    | Downloading package semcor to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package semcor is already up-to-date!
[nltk_data]    | Downloading package senseval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package senseval is already up-to-date!
[nltk_data]    | Downloading package sentiwordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentiwordnet is already up-to-date!
[nltk_data]    | Downloading package sentence_polarity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentence_polarity is already up-to-date!
[nltk_data]    | Downloading package shakespeare to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package shakespeare is already up-to-date!
[nltk_data]    | Downloading package sinica_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sinica_treebank is already up-to-date!
[nltk_data]    | Downloading package smultron to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package smultron is already up-to-date!
[nltk_data]    | Downloading package state_union to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package state_union is already up-to-date!
[nltk_data]    | Downloading package stopwords to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package stopwords is already up-to-date!
[nltk_data]    | Downloading package subjectivity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package subjectivity is already up-to-date!
[nltk_data]    | Downloading package swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package swadesh is already up-to-date!
[nltk_data]    | Downloading package switchboard to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package switchboard is already up-to-date!
[nltk_data]    | Downloading package timit to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package timit is already up-to-date!
[nltk_data]    | Downloading package toolbox to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package toolbox is already up-to-date!
[nltk_data]    | Downloading package treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package treebank is already up-to-date!
[nltk_data]    | Downloading package twitter_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package twitter_samples is already up-to-date!
[nltk_data]    | Downloading package udhr to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr is already up-to-date!
[nltk_data]    | Downloading package udhr2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr2 is already up-to-date!
[nltk_data]    | Downloading package unicode_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package unicode_samples is already up-to-date!
[nltk_data]    | Downloading package universal_treebanks_v20 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_treebanks_v20 is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package verbnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet is already up-to-date!
[nltk_data]    | Downloading package verbnet3 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet3 is already up-to-date!
[nltk_data]    | Downloading package webtext to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package webtext is already up-to-date!
[nltk_data]    | Downloading package wordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet is already up-to-date!
[nltk_data]    | Downloading package wordnet_ic to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet_ic is already up-to-date!
[nltk_data]    | Downloading package words to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package words is already up-to-date!
[nltk_data]    | Downloading package ycoe to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ycoe is already up-to-date!
[nltk_data]    | Downloading package rslp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rslp is already up-to-date!
[nltk_data]    | Downloading package maxent_treebank_pos_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_treebank_pos_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package universal_tagset to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_tagset is already up-to-date!
[nltk_data]    | Downloading package maxent_ne_chunker to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_ne_chunker is already up-to-date!
[nltk_data]    | Downloading package punkt to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package punkt is already up-to-date!
[nltk_data]    | Downloading package book_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package book_grammars is already up-to-date!
[nltk_data]    | Downloading package sample_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sample_grammars is already up-to-date!
[nltk_data]    | Downloading package spanish_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package spanish_grammars is already up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package basque_grammars is already up-to-date!
[nltk_data]    | Downloading package large_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package large_grammars is already up-to-date!
[nltk_data]    | Downloading package tagsets to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package tagsets is already up-to-date!
[nltk_data]    | Downloading package snowball_data to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package snowball_data is already up-to-date!
[nltk_data]    | Downloading package bllip_wsj_no_aux to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package bllip_wsj_no_aux is already up-to-date!
[nltk_data]    | Downloading package word2vec_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package word2vec_sample is already up-to-date!
[nltk_data]    | Downloading package panlex_swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package panlex_swadesh is already up-to-date!
[nltk_data]    | Downloading package mte_teip5 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mte_teip5 is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package perluniprops to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package perluniprops is already up-to-date!
[nltk_data]    | Downloading package nonbreaking_prefixes to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nonbreaking_prefixes is already up-to-date!
[nltk_data]    | Downloading package vader_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package vader_lexicon is already up-to-date!
[nltk_data]    | Downloading package porter_test to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package porter_test is already up-to-date!
[nltk_data]    | Downloading package wmt15_eval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wmt15_eval is already up-to-date!
[nltk_data]    | Downloading package mwa_ppdb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mwa_ppdb is already up-to-date!
[nltk_data]    | 
[nltk_data]  Done downloading collection all
Out[4]:
True
In [5]:
# path = '/content/drive/MyDrive/Files/'

path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
 
df_movies = pd.read_csv(path + 'ottmovies.csv')
 
df_movies.head()
Out[5]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type
0 1 Inception 2010 13+ 8.8 87% Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148.0 movie NaN 1 0 0 0 0
1 2 The Matrix 1999 16+ 8.7 88% Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136.0 movie NaN 1 0 0 0 0
2 3 Avengers: Infinity War 2018 13+ 8.4 85% Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149.0 movie NaN 1 0 0 0 0
3 4 Back to the Future 1985 7+ 8.5 96% Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116.0 movie NaN 1 0 0 0 0
4 5 The Good, the Bad and the Ugly 1966 16+ 8.8 97% Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161.0 movie NaN 1 0 1 0 0
In [6]:
# profile = ProfileReport(df_movies)
# profile
In [7]:
def data_investigate(df):
    print('No of Rows : ', df.shape[0])
    print('No of Coloums : ', df.shape[1])
    print('**'*25)
    print('Colums Names : \n', df.columns)
    print('**'*25)
    print('Datatype of Columns : \n', df.dtypes)
    print('**'*25)
    print('Missing Values : ')
    c = df.isnull().sum()
    c = c[c > 0]
    print(c)
    print('**'*25)
    print('Missing vaules %age wise :\n')
    print((100*(df.isnull().sum()/len(df.index))))
    print('**'*25)
    print('Pictorial Representation : ')
    plt.figure(figsize = (10, 10))
    sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
    plt.show()
In [8]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime            float64
Kind                object
Seasons            float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
dtype: object
**************************************************
Missing Values : 
Age                 8457
IMDb                 328
Rotten Tomatoes    10437
Directors            357
Cast                 648
Genres               234
Country              303
Language             437
Plotline            4958
Runtime              382
Seasons            16923
dtype: int64
**************************************************
Missing vaules %age wise :

ID                   0.000000
Title                0.000000
Year                 0.000000
Age                 49.973409
IMDb                 1.938191
Rotten Tomatoes     61.673462
Directors            2.109555
Cast                 3.829108
Genres               1.382734
Country              1.790463
Language             2.582284
Plotline            29.297406
Runtime              2.257283
Kind                 0.000000
Seasons            100.000000
Netflix              0.000000
Hulu                 0.000000
Prime Video          0.000000
Disney+              0.000000
Type                 0.000000
dtype: float64
**************************************************
Pictorial Representation : 
In [9]:
# ID
# df_movies = df_movies.drop(['ID'], axis = 1)
 
# Age
df_movies.loc[df_movies['Age'].isnull() & df_movies['Disney+'] == 1, "Age"] = '13'
# df_movies.fillna({'Age' : 18}, inplace = True)
df_movies.fillna({'Age' : 'NR'}, inplace = True)
df_movies['Age'].replace({'all': '0'}, inplace = True)
df_movies['Age'].replace({'7+': '7'}, inplace = True)
df_movies['Age'].replace({'13+': '13'}, inplace = True)
df_movies['Age'].replace({'16+': '16'}, inplace = True)
df_movies['Age'].replace({'18+': '18'}, inplace = True)
# df_movies['Age'] = df_movies['Age'].astype(int)
 
# IMDb
# df_movies.fillna({'IMDb' : df_movies['IMDb'].mean()}, inplace = True)
# df_movies.fillna({'IMDb' : df_movies['IMDb'].median()}, inplace = True)
df_movies.fillna({'IMDb' : "NA"}, inplace = True)
 
# Rotten Tomatoes
df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].astype(int)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].mean()}, inplace = True)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].median()}, inplace = True)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'].astype(int)
df_movies.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
 
# Directors
# df_movies = df_movies.drop(['Directors'], axis = 1)
df_movies.fillna({'Directors' : "NA"}, inplace = True)
 
# Cast
df_movies.fillna({'Cast' : "NA"}, inplace = True)
 
# Genres
df_movies.fillna({'Genres': "NA"}, inplace = True)
 
# Country
df_movies.fillna({'Country': "NA"}, inplace = True)
 
# Language
df_movies.fillna({'Language': "NA"}, inplace = True)
 
# Plotline
df_movies.fillna({'Plotline': "NA"}, inplace = True)
 
# Runtime
# df_movies.fillna({'Runtime' : df_movies['Runtime'].mean()}, inplace = True)
# df_movies['Runtime'] = df_movies['Runtime'].astype(int)
df_movies.fillna({'Runtime' : "NA"}, inplace = True)
 
# Kind
# df_movies.fillna({'Kind': "NA"}, inplace = True)
 
# Type
# df_movies.fillna({'Type': "NA"}, inplace = True)
# df_movies = df_movies.drop(['Type'], axis = 1)
 
# Seasons
# df_movies.fillna({'Seasons': 1}, inplace = True)
# df_movies.fillna({'Seasons': "NA"}, inplace = True)
df_movies = df_movies.drop(['Seasons'], axis = 1)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
# df_movies.fillna({'Seasons' : df_movies['Seasons'].mean()}, inplace = True)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
 
# Service Provider
df_movies['Service Provider'] = df_movies.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_movies.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)

# Removing Duplicate and Missing Entries
df_movies.dropna(how = 'any', inplace = True)
df_movies.drop_duplicates(inplace = True)
In [10]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
       'Service Provider'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb                object
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime             object
Kind                object
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
Service Provider    object
dtype: object
**************************************************
Missing Values : 
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :

ID                  0.0
Title               0.0
Year                0.0
Age                 0.0
IMDb                0.0
Rotten Tomatoes     0.0
Directors           0.0
Cast                0.0
Genres              0.0
Country             0.0
Language            0.0
Plotline            0.0
Runtime             0.0
Kind                0.0
Netflix             0.0
Hulu                0.0
Prime Video         0.0
Disney+             0.0
Type                0.0
Service Provider    0.0
dtype: float64
**************************************************
Pictorial Representation : 
In [11]:
df_movies.head()
Out[11]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix
1 2 The Matrix 1999 16 8.7 88 Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix
2 3 Avengers: Infinity War 2018 13 8.4 85 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix
3 4 Back to the Future 1985 7 8.5 96 Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116 movie 1 0 0 0 0 Netflix
4 5 The Good, the Bad and the Ugly 1966 16 8.8 97 Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161 movie 1 0 1 0 0 Netflix
In [12]:
df_movies.describe()
Out[12]:
ID Year Netflix Hulu Prime Video Disney+ Type
count 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.0
mean 8462.000000 2003.211901 0.214915 0.062637 0.727235 0.033150 0.0
std 4885.393638 20.526532 0.410775 0.242315 0.445394 0.179034 0.0
min 1.000000 1901.000000 0.000000 0.000000 0.000000 0.000000 0.0
25% 4231.500000 2001.000000 0.000000 0.000000 0.000000 0.000000 0.0
50% 8462.000000 2012.000000 0.000000 0.000000 1.000000 0.000000 0.0
75% 12692.500000 2016.000000 0.000000 0.000000 1.000000 0.000000 0.0
max 16923.000000 2020.000000 1.000000 1.000000 1.000000 1.000000 0.0
In [13]:
df_movies.corr()
Out[13]:
ID Year Netflix Hulu Prime Video Disney+ Type
ID 1.000000 -0.217816 -0.644470 -0.129926 0.469301 0.263530 NaN
Year -0.217816 1.000000 0.256151 0.101337 -0.255578 -0.047258 NaN
Netflix -0.644470 0.256151 1.000000 -0.118032 -0.745141 -0.089649 NaN
Hulu -0.129926 0.101337 -0.118032 1.000000 -0.284654 -0.039693 NaN
Prime Video 0.469301 -0.255578 -0.745141 -0.284654 1.000000 -0.289008 NaN
Disney+ 0.263530 -0.047258 -0.089649 -0.039693 -0.289008 1.000000 NaN
Type NaN NaN NaN NaN NaN NaN NaN
In [14]:
# df_movies.sort_values('Year', ascending = True)
# df_movies.sort_values('IMDb', ascending = False)
In [15]:
# df_movies.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_ottmovies.csv', index = False)
 
# path = '/content/drive/MyDrive/Files/'
 
# udf_movies = pd.read_csv(path + 'updated_ottmovies.csv')
 
# udf_movies
In [16]:
# df_netflix_movies = df_movies.loc[(df_movies['Netflix'] > 0)]
# df_hulu_movies = df_movies.loc[(df_movies['Hulu'] > 0)]
# df_prime_video_movies = df_movies.loc[(df_movies['Prime Video'] > 0)]
# df_disney_movies = df_movies.loc[(df_movies['Disney+'] > 0)]
In [17]:
df_netflix_only_movies = df_movies[(df_movies['Netflix'] == 1) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_hulu_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 1) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_prime_video_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 1 ) & (df_movies['Disney+'] == 0)]
df_disney_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 1)]
In [18]:
df_movies_runtimes = df_movies.copy()
In [19]:
df_movies_runtimes.drop(df_movies_runtimes.loc[df_movies_runtimes['Runtime'] == "NA"].index, inplace = True)
# df_movies_runtimes = df_movies_runtimes[df_movies_runtimes.Runtime != "NA"]
df_movies_runtimes['Runtime'] = df_movies_runtimes['Runtime'].astype(int)
In [20]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_runtimes_movies = df_movies_runtimes.loc[df_movies_runtimes['Netflix'] == 1]
hulu_runtimes_movies = df_movies_runtimes.loc[df_movies_runtimes['Hulu'] == 1]
prime_video_runtimes_movies = df_movies_runtimes.loc[df_movies_runtimes['Prime Video'] == 1]
disney_runtimes_movies = df_movies_runtimes.loc[df_movies_runtimes['Disney+'] == 1]
In [21]:
df_movies_runtimes_group = df_movies_runtimes.copy()
In [22]:
df_movies_screentimes = df_movies_runtimes.copy()
df_movies_screentimes['Screentime'] = round(df_movies_runtimes['Runtime']/60, ndigits = 2)
In [23]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_screentimes_movies = df_movies_screentimes.loc[df_movies_screentimes['Netflix'] == 1]
hulu_screentimes_movies = df_movies_screentimes.loc[df_movies_screentimes['Hulu'] == 1]
prime_video_screentimes_movies = df_movies_screentimes.loc[df_movies_screentimes['Prime Video'] == 1]
disney_screentimes_movies = df_movies_screentimes.loc[df_movies_screentimes['Disney+'] == 1]
In [24]:
plt.figure(figsize = (10, 10))
corr = df_movies_runtimes.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
In [25]:
df_runtimes_high_movies = df_movies_runtimes.sort_values(by = 'Runtime', ascending = False).reset_index()
df_runtimes_high_movies = df_runtimes_high_movies.drop(['index'], axis = 1)
# filter = (df_movies_runtimes['Runtime'] == (df_movies_runtimes['Runtime'].max()))
# df_runtimes_high_movies = df_movies_runtimes[filter]
 
# highest_rated_movies = df_movies_runtimes.loc[df_movies_runtimes['Runtime'].idxmax()]
 
print('\nMovies with Highest Ever Runtime  are : \n')
df_runtimes_high_movies.head(5)
Movies with Highest Ever Runtime  are : 

Out[25]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 16741 The Remarkable 20th Century 2004 NR 7.8 NA Scott Popjes,Steven Vosburgh Howard K. Smith,Jimmy Hodson Documentary United States NA This four-part series takes an in-depth look a... 600 movie 0 0 1 0 0 Prime Video
1 16720 The Ultimate Civil War Series: 150th Anniversa... 2012 NR 6.9 NA Kevin R. Hershberger Steve Alexander,Randy Allen,Coby Batty,Scott W... Documentary,Action,Drama,History,War United States English NA 353 movie 0 0 1 0 0 Prime Video
2 12686 Custer's Last Stand 1936 NR 4.7 NA Elmer Clifton Rex Lease,Lona Andre,William Farnum,Ruth Mix,J... Adventure,History,Romance,War,Western United States English A cruel and ruthless bandit kills a tavern own... 328 movie 0 0 1 0 0 Prime Video
3 3755 Dina 2017 13 6.8 98 Denis Villeneuve Zendaya,Rebecca Ferguson,Jason Momoa,Dave Baut... Adventure,Drama,Sci-Fi Canada,Hungary,United States English A mythic and emotionally charged hero's journe... 265 movie 0 1 0 0 0 Hulu
4 5520 The Greatest Story Ever Told 1965 0 6.6 41 George Stevens,David Lean,Jean Negulesco Max von Sydow,Michael Anderson Jr.,Carroll Bak... Biography,Drama,History United States English At 30, Patrick O'Brien was TransFatty, a New Y... 260 movie 0 0 1 0 0 Prime Video
In [26]:
fig = px.bar(y = df_runtimes_high_movies['Title'][:15],
             x = df_runtimes_high_movies['Runtime'][:15], 
             color = df_runtimes_high_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Highest Runtime in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [27]:
df_runtimes_low_movies = df_movies_runtimes.sort_values(by = 'Runtime', ascending = True).reset_index()
df_runtimes_low_movies = df_runtimes_low_movies.drop(['index'], axis = 1)
# filter = (df_movies_runtimes['Runtime'] == (df_movies_runtimes['Runtime'].min()))
# df_runtimes_low_movies = df_movies_runtimes[filter]

print('\nMovies with Lowest Ever Runtime  are : \n')
df_runtimes_low_movies.head(5)
Movies with Lowest Ever Runtime  are : 

Out[27]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 13905 Thanksgiving 2014 NR 7.7 100 Eli Roth Mark Bakunas,Vendula Bednarova,Chris Briggs,Da... Short,Comedy,Horror United States English Anthony Dexter---bare-chested most of the film... 2 movie 0 0 1 0 0 Prime Video
1 15583 Jurassic Africa 2018 NR 6.6 NA NA Rick Carter,Gerald R. Molen,Steven Spielberg Short United States English Baseball Hall of Famer Reggie Jackson provides... 2 movie 0 0 1 0 0 Prime Video
2 15971 Luxo Jr. 1986 0 7.3 NA John Lasseter NA Animation,Short,Family United States None Alameda Slim (Randy Quaid), a wanted cattle ru... 2 movie 0 0 0 1 0 Disney+
3 14728 #LoveSwag 2015 13 4.4 NA Austin Davoren Giovanni Watson,Yaritza Betancourt,Shaun Royer Short,Comedy,Drama,Romance United States English Dead bodies are being found in the New York ha... 2 movie 0 0 1 0 0 Prime Video
4 16572 A Brief History 2016 NR 7.5 NA Ion Popescu-Gopo NA Animation,Short Romania Romanian Three-part series, Around the Way, celebrates ... 3 movie 0 1 0 0 0 Hulu
In [28]:
fig = px.bar(y = df_runtimes_low_movies['Title'][:15],
             x = df_runtimes_low_movies['Runtime'][:15], 
             color = df_runtimes_low_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Lowest Runtime in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [29]:
print(f'''
      Total '{df_movies_runtimes['Runtime'].unique().shape[0]}' unique Runtime s were Given, They were Like this,\n
      
{df_movies_runtimes.sort_values(by = 'Runtime', ascending = False)['Runtime'].unique()}\n
 
      The Highest Ever Runtime Ever Any Movie Got is '{df_runtimes_high_movies['Title'][0]}' : '{df_runtimes_high_movies['Runtime'].max()}'\n
 
      The Lowest Ever Runtime Ever Any Movie Got is '{df_runtimes_low_movies['Title'][0]}' : '{df_runtimes_low_movies['Runtime'].min()}'\n
      ''')
      Total '220' unique Runtime s were Given, They were Like this,

      
[600 353 328 265 260 259 258 256 255 242 240 238 233 227 224 220 216 215
 213 212 210 209 206 204 201 200 197 195 194 193 192 191 189 188 187 186
 185 184 183 182 181 180 179 178 177 176 175 174 173 172 171 170 169 168
 167 166 165 164 163 162 161 160 159 158 157 156 155 154 153 152 151 150
 149 148 147 146 145 144 143 142 141 140 139 138 137 136 135 134 133 132
 131 130 129 128 127 126 125 124 123 122 121 120 119 118 117 116 115 114
 113 112 111 110 109 108 107 106 105 104 103 102 101 100  99  98  97  96
  95  94  93  92  91  90  89  88  87  86  85  84  83  82  81  80  79  78
  77  76  75  74  73  72  71  70  69  68  67  66  65  64  63  62  61  60
  59  58  57  56  55  54  53  52  51  50  49  48  47  46  45  44  43  42
  41  40  39  38  37  36  35  34  33  32  31  30  29  28  27  26  25  24
  23  22  21  20  19  18  17  16  15  14  13  12  11  10   9   8   7   6
   5   4   3   2]

 
      The Highest Ever Runtime Ever Any Movie Got is 'The Remarkable 20th Century' : '600'

 
      The Lowest Ever Runtime Ever Any Movie Got is 'Thanksgiving' : '2'

      
In [30]:
netflix_runtimes_high_movies = df_runtimes_high_movies.loc[df_runtimes_high_movies['Netflix']==1].reset_index()
netflix_runtimes_high_movies = netflix_runtimes_high_movies.drop(['index'], axis = 1)
 
netflix_runtimes_low_movies = df_runtimes_low_movies.loc[df_runtimes_low_movies['Netflix']==1].reset_index()
netflix_runtimes_low_movies = netflix_runtimes_low_movies.drop(['index'], axis = 1)
 
netflix_runtimes_high_movies.head(5)
Out[30]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 2207 The Gospel of Matthew 2014 0 7.7 NA Regardt van den Bergh Richard Kiley,Bruce Marchiano,Gerrit Schoonhov... Biography,Drama,History South Africa English Matthew 15:1 - 28:20 - The year is about 62 A.... 258 movie 1 0 0 0 0 Netflix
1 80 Lagaan: Once Upon a Time in India 2001 7 8.1 95 Ashutosh Gowariker Aamir Khan,Gracy Singh,Rachel Shelley,Paul Bla... Drama,Musical,Sport India,United Kingdom Hindi,English This is the story about the resilience shown b... 224 movie 1 0 0 0 0 Netflix
2 2248 Jatt James Bond 2014 7 6.7 NA Rohit Jugraj Gippy Grewal,Zareen Khan,Gurpreet Ghuggi,Yashp... Comedy India Punjabi NA 220 movie 1 0 0 0 0 Netflix
3 2485 The Gospel of Luke 2015 NR 7.1 NA David Batty Selva Rasalingam,Karima Gouit,Mourad Zaoui,El ... Drama United States,United Kingdom,Morocco English,Spanish NA 215 movie 1 0 0 0 0 Netflix
4 368 Jodhaa Akbar 2008 13 7.6 75 Ashutosh Gowariker Hrithik Roshan,Aishwarya Rai Bachchan,Sonu Soo... Action,Drama,History,Romance,War India Hindi,Urdu Jodhaa Akbar is a sixteenth century love story... 213 movie 1 0 0 0 0 Netflix
In [31]:
fig = px.bar(y = netflix_runtimes_high_movies['Title'][:15],
             x = netflix_runtimes_high_movies['Runtime'][:15], 
             color = netflix_runtimes_high_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Highest Runtime in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [32]:
fig = px.bar(y = netflix_runtimes_low_movies['Title'][:15],
             x = netflix_runtimes_low_movies['Runtime'][:15], 
             color = netflix_runtimes_low_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Lowest Runtime in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [33]:
hulu_runtimes_high_movies = df_runtimes_high_movies.loc[df_runtimes_high_movies['Hulu']==1].reset_index()
hulu_runtimes_high_movies = hulu_runtimes_high_movies.drop(['index'], axis = 1)
 
hulu_runtimes_low_movies = df_runtimes_low_movies.loc[df_runtimes_low_movies['Hulu']==1].reset_index()
hulu_runtimes_low_movies = hulu_runtimes_low_movies.drop(['index'], axis = 1)
 
hulu_runtimes_high_movies.head(5)
Out[33]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 3755 Dina 2017 13 6.8 98 Denis Villeneuve Zendaya,Rebecca Ferguson,Jason Momoa,Dave Baut... Adventure,Drama,Sci-Fi Canada,Hungary,United States English A mythic and emotionally charged hero's journe... 265 movie 0 1 0 0 0 Hulu
1 3977 Dark Shadows: The Haunting of Collinwood 2009 7 7.7 NA NA Joan Bennett,Thayer David,Louis Edmonds,Jonath... Drama,Fantasy,Horror United States English NA 210 movie 0 1 1 0 0 Prime Video
2 4201 Dark Shadows: The Vampire Curse 2009 NR 7.7 NA NA Joan Bennett,Thayer David,Louis Edmonds,Jonath... Drama,Fantasy,Horror United States English NA 210 movie 0 1 1 0 0 Prime Video
3 3464 The Green Mile 1999 16 8.6 78 Frank Darabont Tom Hanks,David Morse,Bonnie Hunt,Michael Clar... Crime,Drama,Fantasy,Mystery United States English,French Death Row guards at a penitentiary, in the 193... 189 movie 0 1 0 0 0 Hulu
4 16580 Fear Box 2018 13 6.2 NA Michael Bay Ben Affleck,Josh Hartnett,Kate Beckinsale,Will... Action,Drama,History,Romance,War United States English,Japanese,French NA 183 movie 0 1 0 0 0 Hulu
In [34]:
fig = px.bar(y = hulu_runtimes_high_movies['Title'][:15],
             x = hulu_runtimes_high_movies['Runtime'][:15], 
             color = hulu_runtimes_high_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Highest Runtime in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [35]:
fig = px.bar(y = hulu_runtimes_low_movies['Title'][:15],
             x = hulu_runtimes_low_movies['Runtime'][:15], 
             color = hulu_runtimes_low_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Lowest Runtime in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [36]:
prime_video_runtimes_high_movies = df_runtimes_high_movies.loc[df_runtimes_high_movies['Prime Video']==1].reset_index()
prime_video_runtimes_high_movies = prime_video_runtimes_high_movies.drop(['index'], axis = 1)
 
prime_video_runtimes_low_movies = df_runtimes_low_movies.loc[df_runtimes_low_movies['Prime Video']==1].reset_index()
prime_video_runtimes_low_movies = prime_video_runtimes_low_movies.drop(['index'], axis = 1)
 
prime_video_runtimes_high_movies.head(5)
Out[36]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 16741 The Remarkable 20th Century 2004 NR 7.8 NA Scott Popjes,Steven Vosburgh Howard K. Smith,Jimmy Hodson Documentary United States NA This four-part series takes an in-depth look a... 600 movie 0 0 1 0 0 Prime Video
1 16720 The Ultimate Civil War Series: 150th Anniversa... 2012 NR 6.9 NA Kevin R. Hershberger Steve Alexander,Randy Allen,Coby Batty,Scott W... Documentary,Action,Drama,History,War United States English NA 353 movie 0 0 1 0 0 Prime Video
2 12686 Custer's Last Stand 1936 NR 4.7 NA Elmer Clifton Rex Lease,Lona Andre,William Farnum,Ruth Mix,J... Adventure,History,Romance,War,Western United States English A cruel and ruthless bandit kills a tavern own... 328 movie 0 0 1 0 0 Prime Video
3 5520 The Greatest Story Ever Told 1965 0 6.6 41 George Stevens,David Lean,Jean Negulesco Max von Sydow,Michael Anderson Jr.,Carroll Bak... Biography,Drama,History United States English At 30, Patrick O'Brien was TransFatty, a New Y... 260 movie 0 0 1 0 0 Prime Video
4 4536 Tom Petty and the Heartbreakers: Runnin' Down ... 2007 NR 8.6 100 Peter Bogdanovich Neil Armstrong,Mick Avory,Ron Blair,Peter Bogd... Documentary,Music United States English NA 259 movie 0 0 1 0 0 Prime Video
In [37]:
fig = px.bar(y = prime_video_runtimes_high_movies['Title'][:15],
             x = prime_video_runtimes_high_movies['Runtime'][:15], 
             color = prime_video_runtimes_high_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Highest Runtime in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [38]:
fig = px.bar(y = prime_video_runtimes_low_movies['Title'][:15],
             x = prime_video_runtimes_low_movies['Runtime'][:15], 
             color = prime_video_runtimes_low_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Lowest Runtime in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [39]:
disney_runtimes_high_movies = df_runtimes_high_movies.loc[df_runtimes_high_movies['Disney+']==1].reset_index()
disney_runtimes_high_movies = disney_runtimes_high_movies.drop(['index'], axis = 1)
 
disney_runtimes_low_movies = df_runtimes_low_movies.loc[df_runtimes_low_movies['Disney+']==1].reset_index()
disney_runtimes_low_movies = disney_runtimes_low_movies.drop(['index'], axis = 1)
 
disney_runtimes_high_movies.head(5)
Out[39]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 15735 Avengers: Endgame 2019 13 8.4 94 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Evans,Mark Ruffalo,Chr... Action,Adventure,Drama,Sci-Fi United States English,Japanese,Xhosa,German An elderly man reads the book "The Princess Br... 181 movie 0 0 0 1 0 Disney+
1 15774 The Sound of Music 1965 0 8 83 Robert Wise Julie Andrews,Christopher Plummer,Eleanor Park... Biography,Drama,Family,Musical,Romance United States English,German In this animated comedy from the folks at Disn... 172 movie 0 0 0 1 0 Disney+
2 15803 Pirates of the Caribbean: At World's End 2007 13 7.1 44 Gore Verbinski Johnny Depp,Geoffrey Rush,Orlando Bloom,Keira ... Action,Adventure,Fantasy United States English The Good Dinosaur asks the question: What if t... 169 movie 0 0 0 1 0 Disney+
3 15970 Around the World in 80 Days 2004 0 6.8 32 Michael Anderson,John Farrow Cantinflas,Finlay Currie,Robert Morley,Ronald ... Adventure,Comedy,Family,Romance United States English,Spanish,French Race car driver, Jim Douglas goes to Monte Car... 167 movie 0 0 0 1 0 Disney+
4 15793 Star Wars: The Last Jedi 2017 13 7 90 Rian Johnson Mark Hamill,Carrie Fisher,Adam Driver,Daisy Ri... Action,Adventure,Fantasy,Sci-Fi United States English While living the quiet life in a swamp, Kermit... 152 movie 0 0 0 1 0 Disney+
In [40]:
fig = px.bar(y = disney_runtimes_high_movies['Title'][:15],
             x = disney_runtimes_high_movies['Runtime'][:15], 
             color = disney_runtimes_high_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Highest Runtime in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [41]:
fig = px.bar(y = disney_runtimes_low_movies['Title'][:15],
             x = disney_runtimes_low_movies['Runtime'][:15], 
             color = disney_runtimes_low_movies['Runtime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Lowest Runtime in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [42]:
print(f'''
      The Movie with Highest Runtime  Ever Got is '{df_runtimes_high_movies['Title'][0]}' : '{df_runtimes_high_movies['Runtime'].max()}'\n
      The Movie with Lowest Runtime  Ever Got is '{df_runtimes_low_movies['Title'][0]}' : '{df_runtimes_low_movies['Runtime'].min()}'\n
      
      The Movie with Highest Runtime  on 'Netflix' is '{netflix_runtimes_high_movies['Title'][0]}' : '{netflix_runtimes_high_movies['Runtime'].max()}'\n
      The Movie with Lowest Runtime  on 'Netflix' is '{netflix_runtimes_low_movies['Title'][0]}' : '{netflix_runtimes_low_movies['Runtime'].min()}'\n
      
      The Movie with Highest Runtime  on 'Hulu' is '{hulu_runtimes_high_movies['Title'][0]}' : '{hulu_runtimes_high_movies['Runtime'].max()}'\n
      The Movie with Lowest Runtime  on 'Hulu' is '{hulu_runtimes_low_movies['Title'][0]}' : '{hulu_runtimes_low_movies['Runtime'].min()}'\n
      
      The Movie with Highest Runtime  on 'Prime Video' is '{prime_video_runtimes_high_movies['Title'][0]}' : '{prime_video_runtimes_high_movies['Runtime'].max()}'\n
      The Movie with Lowest Runtime  on 'Prime Video' is '{prime_video_runtimes_low_movies['Title'][0]}' : '{prime_video_runtimes_low_movies['Runtime'].min()}'\n
      
      The Movie with Highest Runtime  on 'Disney+' is '{disney_runtimes_high_movies['Title'][0]}' : '{disney_runtimes_high_movies['Runtime'].max()}'\n
      The Movie with Lowest Runtime  on 'Disney+' is '{disney_runtimes_low_movies['Title'][0]}' : '{disney_runtimes_low_movies['Runtime'].min()}'\n 
      ''')
      The Movie with Highest Runtime  Ever Got is 'The Remarkable 20th Century' : '600'

      The Movie with Lowest Runtime  Ever Got is 'Thanksgiving' : '2'

      
      The Movie with Highest Runtime  on 'Netflix' is 'The Gospel of Matthew' : '258'

      The Movie with Lowest Runtime  on 'Netflix' is 'Silent' : '3'

      
      The Movie with Highest Runtime  on 'Hulu' is 'Dina' : '265'

      The Movie with Lowest Runtime  on 'Hulu' is 'A Brief History' : '3'

      
      The Movie with Highest Runtime  on 'Prime Video' is 'The Remarkable 20th Century' : '600'

      The Movie with Lowest Runtime  on 'Prime Video' is 'Thanksgiving' : '2'

      
      The Movie with Highest Runtime  on 'Disney+' is 'Avengers: Endgame' : '181'

      The Movie with Lowest Runtime  on 'Disney+' is 'Luxo Jr.' : '2'
 
      
In [43]:
print(f'''
      Accross All Platforms the Average Runtime  is '{round(df_movies_runtimes['Runtime'].mean(), ndigits = 2)}'\n
      The Average Runtime  on 'Netflix' is '{round(netflix_runtimes_movies['Runtime'].mean(), ndigits = 2)}'\n
      The Average Runtime  on 'Hulu' is '{round(hulu_runtimes_movies['Runtime'].mean(), ndigits = 2)}'\n
      The Average Runtime  on 'Prime Video' is '{round(prime_video_runtimes_movies['Runtime'].mean(), ndigits = 2)}'\n
      The Average Runtime  on 'Disney+' is '{round(disney_runtimes_movies['Runtime'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Runtime  is '94.08'

      The Average Runtime  on 'Netflix' is '100.04'

      The Average Runtime  on 'Hulu' is '97.49'

      The Average Runtime  on 'Prime Video' is '92.71'

      The Average Runtime  on 'Disney+' is '91.59'
 
      
In [44]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_movies_runtimes['Runtime'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_movies_runtimes['Runtime'], ax = ax[1])
plt.show()
In [45]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Runtime s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_runtimes_movies['Runtime'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_runtimes_movies['Runtime'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_runtimes_movies['Runtime'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_runtimes_movies['Runtime'][:100], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [46]:
def round_val(data):
    if str(data) != 'nan':
        return round(data)
        
def round_fix(data):
    if data in range(0,51):
        # print(data)
        return 50
    if data in range(51,101):
        return 100
    if data in range(101,151):
        return 150
    if data in range(151,201):
        return 200
    if data in range(201,251):
        return 250
    if data in range(251,301):
        return 300
    if data in range(301,351):
        return 350
    if data in range(351,401):
        return 400
    if data in range(401,451):
        return 450
    if data in range(451,501):
        return 500
    if data in range(501,551):
        return 550
    if data in range(551,601):
        return 600
    if data in range(601,651):
        return 650
    if data in range(651,701):
        return 700
    if data in range(701,751):
        return 750
    if data in range(751,801):
        return 800
    if data in range(801,851):
        return 850
    if data in range(851,901):
        return 900
    if data in range(901,951):
        return 950
    if data in range(951,1001):
        return 1000
    if data in range(1001,1051):
        return 1050
    if data in range(1051,1101):
        return 1100
    if data in range(1101,1151):
        return 1150
    if data in range(1151,1201):
        return 1200
    if data in range(1201,1251):
        return 1250
    if data in range(1251,1301):
        return 1300
    if data in range(1301,1351):
        return 1350
    if data in range(1351,2001):
        return 2000
In [47]:
df_movies_runtimes_group['Runtime Group'] = df_movies_runtimes['Runtime'].apply(round_fix)
 
runtimes_values = df_movies_runtimes_group['Runtime Group'].value_counts().sort_index(ascending = False).tolist()
runtimes_index = df_movies_runtimes_group['Runtime Group'].value_counts().sort_index(ascending = False).index
 
# runtimes_values, runtimes_index
In [48]:
runtimes_group_count = df_movies_runtimes_group.groupby('Runtime Group')['Title'].count()
runtimes_group_movies = df_movies_runtimes_group.groupby('Runtime Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
runtimes_group_data_movies = pd.concat([runtimes_group_count, runtimes_group_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
runtimes_group_data_movies = runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [49]:
# Runtime Group with Movies Counts - All Platforms Combined
runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
Out[49]:
Runtime Group Movies Count Netflix Hulu Prime Video Disney+
1 100 10787 1788 616 8328 330
2 150 4641 1415 380 2978 169
0 50 636 144 28 442 46
3 200 444 144 15 311 7
4 250 24 9 2 17 0
5 300 6 1 1 4 0
6 350 1 0 0 1 0
7 400 1 0 0 1 0
8 600 1 0 0 1 0
In [50]:
runtimes_group_data_movies.sort_values(by = 'Runtime Group', ascending = False)
Out[50]:
Runtime Group Movies Count Netflix Hulu Prime Video Disney+
8 600 1 0 0 1 0
7 400 1 0 0 1 0
6 350 1 0 0 1 0
5 300 6 1 1 4 0
4 250 24 9 2 17 0
3 200 444 144 15 311 7
2 150 4641 1415 380 2978 169
1 100 10787 1788 616 8328 330
0 50 636 144 28 442 46
In [51]:
fig = px.bar(y = runtimes_group_data_movies['Movies Count'],
             x = runtimes_group_data_movies['Runtime Group'], 
             color = runtimes_group_data_movies['Runtime Group'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies Count', 'x' : 'Runtime : In Minutes'},
             title  = 'Movies with Group Runtime in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [52]:
fig = px.pie(runtimes_group_data_movies[:10],
             names = runtimes_group_data_movies['Runtime Group'],
             values = runtimes_group_data_movies['Movies Count'],
             color = runtimes_group_data_movies['Movies Count'],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'Movies Count based on Runtime Group')
fig.show()
In [53]:
df_runtimes_group_high_movies = runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_runtimes_group_high_movies = df_runtimes_group_high_movies.drop(['index'], axis = 1)
# filter = (runtimes_group_data_movies['Movies Count'] ==  (runtimes_group_data_movies['Movies Count'].max()))
# df_runtimes_group_high_movies = runtimes_group_data_movies[filter]
 
# highest_rated_movies = runtimes_group_data_movies.loc[runtimes_group_data_movies['Movies Count'].idxmax()]
 
# print('\nRuntime with Highest Ever Movies Count are : All Platforms Combined\n')
df_runtimes_group_high_movies.head(5)
Out[53]:
Runtime Group Movies Count Netflix Hulu Prime Video Disney+
0 100 10787 1788 616 8328 330
1 150 4641 1415 380 2978 169
2 50 636 144 28 442 46
3 200 444 144 15 311 7
4 250 24 9 2 17 0
In [54]:
df_runtimes_group_low_movies = runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_runtimes_group_low_movies = df_runtimes_group_low_movies.drop(['index'], axis = 1)
# filter = (runtimes_group_data_movies['Movies Count'] = =  (runtimes_group_data_movies['Movies Count'].min()))
# df_runtimes_group_low_movies = runtimes_group_data_movies[filter]
 
# print('\nRuntime with Lowest Ever Movies Count are : All Platforms Combined\n')
df_runtimes_group_low_movies.head(5)
Out[54]:
Runtime Group Movies Count Netflix Hulu Prime Video Disney+
0 350 1 0 0 1 0
1 400 1 0 0 1 0
2 600 1 0 0 1 0
3 300 6 1 1 4 0
4 250 24 9 2 17 0
In [55]:
print(f'''
      Total '{df_movies_runtimes['Runtime'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see Movies from Total '{runtimes_group_data_movies['Runtime Group'].unique().shape[0]}' Runtime Group, They were Like this, \n
 
      {runtimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)['Runtime Group'].unique()} etc. \n
 
      The Runtime Group with Highest Movies Count have '{runtimes_group_data_movies['Movies Count'].max()}' Movies Available is '{df_runtimes_group_high_movies['Runtime Group'][0]}', &\n
      The Runtime Group with Lowest Movies Count have '{runtimes_group_data_movies['Movies Count'].min()}' Movies Available is '{df_runtimes_group_low_movies['Runtime Group'][0]}'
      ''')
      Total '16541' Titles are available on All Platforms, out of which

      You Can Choose to see Movies from Total '9' Runtime Group, They were Like this, 

 
      [100 150  50 200 250 300 350 400 600] etc. 

 
      The Runtime Group with Highest Movies Count have '10787' Movies Available is '100', &

      The Runtime Group with Lowest Movies Count have '1' Movies Available is '350'
      
In [56]:
netflix_runtimes_group_movies = runtimes_group_data_movies[runtimes_group_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_runtimes_group_movies = netflix_runtimes_group_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_runtimes_group_high_movies = df_runtimes_group_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_runtimes_group_high_movies = netflix_runtimes_group_high_movies.drop(['index'], axis = 1)
 
netflix_runtimes_group_low_movies = df_runtimes_group_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_runtimes_group_low_movies = netflix_runtimes_group_low_movies.drop(['index'], axis = 1)
 
netflix_runtimes_group_high_movies.head(5)
Out[56]:
Runtime Group Movies Count Netflix Hulu Prime Video Disney+
0 100 10787 1788 616 8328 330
1 150 4641 1415 380 2978 169
2 50 636 144 28 442 46
3 200 444 144 15 311 7
4 250 24 9 2 17 0
In [57]:
hulu_runtimes_group_movies = runtimes_group_data_movies[runtimes_group_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_runtimes_group_movies = hulu_runtimes_group_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_runtimes_group_high_movies = df_runtimes_group_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_runtimes_group_high_movies = hulu_runtimes_group_high_movies.drop(['index'], axis = 1)
 
hulu_runtimes_group_low_movies = df_runtimes_group_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_runtimes_group_low_movies = hulu_runtimes_group_low_movies.drop(['index'], axis = 1)
 
hulu_runtimes_group_high_movies.head(5)
Out[57]:
Runtime Group Movies Count Netflix Hulu Prime Video Disney+
0 100 10787 1788 616 8328 330
1 150 4641 1415 380 2978 169
2 50 636 144 28 442 46
3 200 444 144 15 311 7
4 250 24 9 2 17 0
In [58]:
prime_video_runtimes_group_movies = runtimes_group_data_movies[runtimes_group_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_runtimes_group_movies = prime_video_runtimes_group_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_runtimes_group_high_movies = df_runtimes_group_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_runtimes_group_high_movies = prime_video_runtimes_group_high_movies.drop(['index'], axis = 1)
 
prime_video_runtimes_group_low_movies = df_runtimes_group_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_runtimes_group_low_movies = prime_video_runtimes_group_low_movies.drop(['index'], axis = 1)
 
prime_video_runtimes_group_high_movies.head(5)
Out[58]:
Runtime Group Movies Count Netflix Hulu Prime Video Disney+
0 100 10787 1788 616 8328 330
1 150 4641 1415 380 2978 169
2 50 636 144 28 442 46
3 200 444 144 15 311 7
4 250 24 9 2 17 0
In [59]:
disney_runtimes_group_movies = runtimes_group_data_movies[runtimes_group_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_runtimes_group_movies = disney_runtimes_group_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_runtimes_group_high_movies = df_runtimes_group_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_runtimes_group_high_movies = disney_runtimes_group_high_movies.drop(['index'], axis = 1)
 
disney_runtimes_group_low_movies = df_runtimes_group_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_runtimes_group_low_movies = disney_runtimes_group_low_movies.drop(['index'], axis = 1)
 
disney_runtimes_group_high_movies.head(5)
Out[59]:
Runtime Group Movies Count Netflix Hulu Prime Video Disney+
0 100 10787 1788 616 8328 330
1 150 4641 1415 380 2978 169
2 50 636 144 28 442 46
3 200 444 144 15 311 7
4 250 24 9 2 17 0
In [60]:
print(f'''
      The Runtime Group with Highest Movies Count Ever Got is '{df_runtimes_group_high_movies['Runtime Group'][0]}' : '{df_runtimes_group_high_movies['Movies Count'].max()}'\n
      The Runtime Group with Lowest Movies Count Ever Got is '{df_runtimes_group_low_movies['Runtime Group'][0]}' : '{df_runtimes_group_low_movies['Movies Count'].min()}'\n
      
      The Runtime Group with Highest Movies Count on 'Netflix' is '{netflix_runtimes_group_high_movies['Runtime Group'][0]}' : '{netflix_runtimes_group_high_movies['Netflix'].max()}'\n
      The Runtime Group with Lowest Movies Count on 'Netflix' is '{netflix_runtimes_group_low_movies['Runtime Group'][0]}' : '{netflix_runtimes_group_low_movies['Netflix'].min()}'\n
      
      The Runtime Group with Highest Movies Count on 'Hulu' is '{hulu_runtimes_group_high_movies['Runtime Group'][0]}' : '{hulu_runtimes_group_high_movies['Hulu'].max()}'\n
      The Runtime Group with Lowest Movies Count on 'Hulu' is '{hulu_runtimes_group_low_movies['Runtime Group'][0]}' : '{hulu_runtimes_group_low_movies['Hulu'].min()}'\n
      
      The Runtime Group with Highest Movies Count on 'Prime Video' is '{prime_video_runtimes_group_high_movies['Runtime Group'][0]}' : '{prime_video_runtimes_group_high_movies['Prime Video'].max()}'\n
      The Runtime Group with Lowest Movies Count on 'Prime Video' is '{prime_video_runtimes_group_low_movies['Runtime Group'][0]}' : '{prime_video_runtimes_group_low_movies['Prime Video'].min()}'\n
      
      The Runtime Group with Highest Movies Count on 'Disney+' is '{disney_runtimes_group_high_movies['Runtime Group'][0]}' : '{disney_runtimes_group_high_movies['Disney+'].max()}'\n
      The Runtime Group with Lowest Movies Count on 'Disney+' is '{disney_runtimes_group_low_movies['Runtime Group'][0]}' : '{disney_runtimes_group_low_movies['Disney+'].min()}'\n 
      ''')
      The Runtime Group with Highest Movies Count Ever Got is '100' : '10787'

      The Runtime Group with Lowest Movies Count Ever Got is '350' : '1'

      
      The Runtime Group with Highest Movies Count on 'Netflix' is '100' : '1788'

      The Runtime Group with Lowest Movies Count on 'Netflix' is '350' : '0'

      
      The Runtime Group with Highest Movies Count on 'Hulu' is '100' : '616'

      The Runtime Group with Lowest Movies Count on 'Hulu' is '350' : '0'

      
      The Runtime Group with Highest Movies Count on 'Prime Video' is '100' : '8328'

      The Runtime Group with Lowest Movies Count on 'Prime Video' is '350' : '1'

      
      The Runtime Group with Highest Movies Count on 'Disney+' is '100' : '330'

      The Runtime Group with Lowest Movies Count on 'Disney+' is '250' : '0'
 
      
In [61]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = netflix_runtimes_group_movies['Runtime Group'][:10], y = netflix_runtimes_group_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = hulu_runtimes_group_movies['Runtime Group'][:10], y = hulu_runtimes_group_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = prime_video_runtimes_group_movies['Runtime Group'][:10], y = prime_video_runtimes_group_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = disney_runtimes_group_movies['Runtime Group'][:10], y = disney_runtimes_group_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()
In [62]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = runtimes_group_data_movies['Runtime Group'], y = runtimes_group_data_movies['Netflix'], color = 'red')
sns.lineplot(x = runtimes_group_data_movies['Runtime Group'], y = runtimes_group_data_movies['Hulu'], color = 'lightgreen')
sns.lineplot(x = runtimes_group_data_movies['Runtime Group'], y = runtimes_group_data_movies['Prime Video'], color = 'lightblue')
sns.lineplot(x = runtimes_group_data_movies['Runtime Group'], y = runtimes_group_data_movies['Disney+'], color = 'darkblue')
plt.xlabel('Runtime Group', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
In [63]:
print(f'''
      Accross All Platforms Total Count of Runtime Group is '{runtimes_group_data_movies['Runtime Group'].unique().shape[0]}'\n
      Total Count of Runtime Group on 'Netflix' is '{netflix_runtimes_group_movies['Runtime Group'].unique().shape[0]}'\n
      Total Count of Runtime Group on 'Hulu' is '{hulu_runtimes_group_movies['Runtime Group'].unique().shape[0]}'\n
      Total Count of Runtime Group on 'Prime Video' is '{prime_video_runtimes_group_movies['Runtime Group'].unique().shape[0]}'\n
      Total Count of Runtime Group on 'Disney+' is '{disney_runtimes_group_movies['Runtime Group'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Runtime Group is '9'

      Total Count of Runtime Group on 'Netflix' is '6'

      Total Count of Runtime Group on 'Hulu' is '6'

      Total Count of Runtime Group on 'Prime Video' is '9'

      Total Count of Runtime Group on 'Disney+' is '4'
 
      
In [64]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.lineplot(y = runtimes_group_data_movies['Runtime Group'], x = runtimes_group_data_movies['Netflix'], color = 'red', ax = axes[0, 0])
h_ru_ax2 = sns.lineplot(y = runtimes_group_data_movies['Runtime Group'], x = runtimes_group_data_movies['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_ru_ax3 = sns.lineplot(y = runtimes_group_data_movies['Runtime Group'], x = runtimes_group_data_movies['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_ru_ax4 = sns.lineplot(y = runtimes_group_data_movies['Runtime Group'], x = runtimes_group_data_movies['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])

plt.show()
In [65]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = runtimes_group_data_movies['Runtime Group'][:10], y = runtimes_group_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = runtimes_group_data_movies['Runtime Group'][:10], y = runtimes_group_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = runtimes_group_data_movies['Runtime Group'][:10], y = runtimes_group_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = runtimes_group_data_movies['Runtime Group'][:10], y = runtimes_group_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()
In [66]:
df_screentimes_high_movies = df_movies_screentimes.sort_values(by = 'Screentime', ascending = False).reset_index()
df_screentimes_high_movies = df_screentimes_high_movies.drop(['index'], axis = 1)
# filter = (df_movies_screentimes['Screentime'] == (df_movies_screentimes['Screentime'].max()))
# df_screentimes_high_movies = df_movies_screentimes[filter]
 
# highest_rated_movies = df_movies_screentimes.loc[df_movies_screentimes['Screentime'].idxmax()]
 
print('\nMovies with Highest Ever Screentime  are : \n')
df_screentimes_high_movies.head(5)
Movies with Highest Ever Screentime  are : 

Out[66]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 16741 The Remarkable 20th Century 2004 NR 7.8 NA Scott Popjes,Steven Vosburgh Howard K. Smith,Jimmy Hodson Documentary United States ... This four-part series takes an in-depth look a... 600 movie 0 0 1 0 0 Prime Video 10.00
1 16720 The Ultimate Civil War Series: 150th Anniversa... 2012 NR 6.9 NA Kevin R. Hershberger Steve Alexander,Randy Allen,Coby Batty,Scott W... Documentary,Action,Drama,History,War United States ... NA 353 movie 0 0 1 0 0 Prime Video 5.88
2 12686 Custer's Last Stand 1936 NR 4.7 NA Elmer Clifton Rex Lease,Lona Andre,William Farnum,Ruth Mix,J... Adventure,History,Romance,War,Western United States ... A cruel and ruthless bandit kills a tavern own... 328 movie 0 0 1 0 0 Prime Video 5.47
3 3755 Dina 2017 13 6.8 98 Denis Villeneuve Zendaya,Rebecca Ferguson,Jason Momoa,Dave Baut... Adventure,Drama,Sci-Fi Canada,Hungary,United States ... A mythic and emotionally charged hero's journe... 265 movie 0 1 0 0 0 Hulu 4.42
4 5520 The Greatest Story Ever Told 1965 0 6.6 41 George Stevens,David Lean,Jean Negulesco Max von Sydow,Michael Anderson Jr.,Carroll Bak... Biography,Drama,History United States ... At 30, Patrick O'Brien was TransFatty, a New Y... 260 movie 0 0 1 0 0 Prime Video 4.33

5 rows × 21 columns

In [67]:
fig = px.bar(y = df_screentimes_high_movies['Title'][:15],
             x = df_screentimes_high_movies['Screentime'][:15], 
             color = df_screentimes_high_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Highest Screentime in Hours : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [68]:
df_screentimes_low_movies = df_movies_screentimes.sort_values(by = 'Screentime', ascending = True).reset_index()
df_screentimes_low_movies = df_screentimes_low_movies.drop(['index'], axis = 1)
# filter = (df_movies_screentimes['Screentime'] == (df_movies_screentimes['Screentime'].min()))
# df_screentimes_low_movies = df_movies_screentimes[filter]

print('\nMovies with Lowest Ever Screentime  are : \n')
df_screentimes_low_movies.head(5)
Movies with Lowest Ever Screentime  are : 

Out[68]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 13905 Thanksgiving 2014 NR 7.7 100 Eli Roth Mark Bakunas,Vendula Bednarova,Chris Briggs,Da... Short,Comedy,Horror United States ... Anthony Dexter---bare-chested most of the film... 2 movie 0 0 1 0 0 Prime Video 0.03
1 15583 Jurassic Africa 2018 NR 6.6 NA NA Rick Carter,Gerald R. Molen,Steven Spielberg Short United States ... Baseball Hall of Famer Reggie Jackson provides... 2 movie 0 0 1 0 0 Prime Video 0.03
2 15971 Luxo Jr. 1986 0 7.3 NA John Lasseter NA Animation,Short,Family United States ... Alameda Slim (Randy Quaid), a wanted cattle ru... 2 movie 0 0 0 1 0 Disney+ 0.03
3 14728 #LoveSwag 2015 13 4.4 NA Austin Davoren Giovanni Watson,Yaritza Betancourt,Shaun Royer Short,Comedy,Drama,Romance United States ... Dead bodies are being found in the New York ha... 2 movie 0 0 1 0 0 Prime Video 0.03
4 16572 A Brief History 2016 NR 7.5 NA Ion Popescu-Gopo NA Animation,Short Romania ... Three-part series, Around the Way, celebrates ... 3 movie 0 1 0 0 0 Hulu 0.05

5 rows × 21 columns

In [69]:
fig = px.bar(y = df_screentimes_low_movies['Title'][:15],
             x = df_screentimes_low_movies['Screentime'][:15], 
             color = df_screentimes_low_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Lowest Screentime in Hours : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [70]:
print(f'''
      Total '{df_movies_screentimes['Screentime'].unique().shape[0]}' unique Screentime s were Given, They were Like this,\n
      
{df_movies_screentimes.sort_values(by = 'Screentime', ascending = False)['Screentime'].unique()}\n
 
      The Highest Ever Screentime Ever Any Movie Got is '{df_screentimes_high_movies['Title'][0]}' : '{df_screentimes_high_movies['Screentime'].max()}'\n
 
      The Lowest Ever Screentime Ever Any Movie Got is '{df_screentimes_low_movies['Title'][0]}' : '{df_screentimes_low_movies['Screentime'].min()}'\n
      ''')
      Total '220' unique Screentime s were Given, They were Like this,

      
[10.    5.88  5.47  4.42  4.33  4.32  4.3   4.27  4.25  4.03  4.    3.97
  3.88  3.78  3.73  3.67  3.6   3.58  3.55  3.53  3.5   3.48  3.43  3.4
  3.35  3.33  3.28  3.25  3.23  3.22  3.2   3.18  3.15  3.13  3.12  3.1
  3.08  3.07  3.05  3.03  3.02  3.    2.98  2.97  2.95  2.93  2.92  2.9
  2.88  2.87  2.85  2.83  2.82  2.8   2.78  2.77  2.75  2.73  2.72  2.7
  2.68  2.67  2.65  2.63  2.62  2.6   2.58  2.57  2.55  2.53  2.52  2.5
  2.48  2.47  2.45  2.43  2.42  2.4   2.38  2.37  2.35  2.33  2.32  2.3
  2.28  2.27  2.25  2.23  2.22  2.2   2.18  2.17  2.15  2.13  2.12  2.1
  2.08  2.07  2.05  2.03  2.02  2.    1.98  1.97  1.95  1.93  1.92  1.9
  1.88  1.87  1.85  1.83  1.82  1.8   1.78  1.77  1.75  1.73  1.72  1.7
  1.68  1.67  1.65  1.63  1.62  1.6   1.58  1.57  1.55  1.53  1.52  1.5
  1.48  1.47  1.45  1.43  1.42  1.4   1.38  1.37  1.35  1.33  1.32  1.3
  1.28  1.27  1.25  1.23  1.22  1.2   1.18  1.17  1.15  1.13  1.12  1.1
  1.08  1.07  1.05  1.03  1.02  1.    0.98  0.97  0.95  0.93  0.92  0.9
  0.88  0.87  0.85  0.83  0.82  0.8   0.78  0.77  0.75  0.73  0.72  0.7
  0.68  0.67  0.65  0.63  0.62  0.6   0.58  0.57  0.55  0.53  0.52  0.5
  0.48  0.47  0.45  0.43  0.42  0.4   0.38  0.37  0.35  0.33  0.32  0.3
  0.28  0.27  0.25  0.23  0.22  0.2   0.18  0.17  0.15  0.13  0.12  0.1
  0.08  0.07  0.05  0.03]

 
      The Highest Ever Screentime Ever Any Movie Got is 'The Remarkable 20th Century' : '10.0'

 
      The Lowest Ever Screentime Ever Any Movie Got is 'Thanksgiving' : '0.03'

      
In [71]:
netflix_screentimes_high_movies = df_screentimes_high_movies.loc[df_screentimes_high_movies['Netflix']==1].reset_index()
netflix_screentimes_high_movies = netflix_screentimes_high_movies.drop(['index'], axis = 1)
 
netflix_screentimes_low_movies = df_screentimes_low_movies.loc[df_screentimes_low_movies['Netflix']==1].reset_index()
netflix_screentimes_low_movies = netflix_screentimes_low_movies.drop(['index'], axis = 1)
 
netflix_screentimes_high_movies.head(5)
Out[71]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 2207 The Gospel of Matthew 2014 0 7.7 NA Regardt van den Bergh Richard Kiley,Bruce Marchiano,Gerrit Schoonhov... Biography,Drama,History South Africa ... Matthew 15:1 - 28:20 - The year is about 62 A.... 258 movie 1 0 0 0 0 Netflix 4.30
1 80 Lagaan: Once Upon a Time in India 2001 7 8.1 95 Ashutosh Gowariker Aamir Khan,Gracy Singh,Rachel Shelley,Paul Bla... Drama,Musical,Sport India,United Kingdom ... This is the story about the resilience shown b... 224 movie 1 0 0 0 0 Netflix 3.73
2 2248 Jatt James Bond 2014 7 6.7 NA Rohit Jugraj Gippy Grewal,Zareen Khan,Gurpreet Ghuggi,Yashp... Comedy India ... NA 220 movie 1 0 0 0 0 Netflix 3.67
3 2485 The Gospel of Luke 2015 NR 7.1 NA David Batty Selva Rasalingam,Karima Gouit,Mourad Zaoui,El ... Drama United States,United Kingdom,Morocco ... NA 215 movie 1 0 0 0 0 Netflix 3.58
4 368 Jodhaa Akbar 2008 13 7.6 75 Ashutosh Gowariker Hrithik Roshan,Aishwarya Rai Bachchan,Sonu Soo... Action,Drama,History,Romance,War India ... Jodhaa Akbar is a sixteenth century love story... 213 movie 1 0 0 0 0 Netflix 3.55

5 rows × 21 columns

In [72]:
fig = px.bar(y = netflix_screentimes_high_movies['Title'][:15],
             x = netflix_screentimes_high_movies['Screentime'][:15], 
             color = netflix_screentimes_high_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Highest Screentime in Hours : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [73]:
fig = px.bar(y = netflix_screentimes_low_movies['Title'][:15],
             x = netflix_screentimes_low_movies['Screentime'][:15], 
             color = netflix_screentimes_low_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Lowest Screentime in Hours : Netflix')
			 
fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [74]:
hulu_screentimes_high_movies = df_screentimes_high_movies.loc[df_screentimes_high_movies['Hulu']==1].reset_index()
hulu_screentimes_high_movies = hulu_screentimes_high_movies.drop(['index'], axis = 1)
 
hulu_screentimes_low_movies = df_screentimes_low_movies.loc[df_screentimes_low_movies['Hulu']==1].reset_index()
hulu_screentimes_low_movies = hulu_screentimes_low_movies.drop(['index'], axis = 1)
 
hulu_screentimes_high_movies.head(5)
Out[74]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 3755 Dina 2017 13 6.8 98 Denis Villeneuve Zendaya,Rebecca Ferguson,Jason Momoa,Dave Baut... Adventure,Drama,Sci-Fi Canada,Hungary,United States ... A mythic and emotionally charged hero's journe... 265 movie 0 1 0 0 0 Hulu 4.42
1 3977 Dark Shadows: The Haunting of Collinwood 2009 7 7.7 NA NA Joan Bennett,Thayer David,Louis Edmonds,Jonath... Drama,Fantasy,Horror United States ... NA 210 movie 0 1 1 0 0 Prime Video 3.50
2 4201 Dark Shadows: The Vampire Curse 2009 NR 7.7 NA NA Joan Bennett,Thayer David,Louis Edmonds,Jonath... Drama,Fantasy,Horror United States ... NA 210 movie 0 1 1 0 0 Prime Video 3.50
3 3464 The Green Mile 1999 16 8.6 78 Frank Darabont Tom Hanks,David Morse,Bonnie Hunt,Michael Clar... Crime,Drama,Fantasy,Mystery United States ... Death Row guards at a penitentiary, in the 193... 189 movie 0 1 0 0 0 Hulu 3.15
4 16580 Fear Box 2018 13 6.2 NA Michael Bay Ben Affleck,Josh Hartnett,Kate Beckinsale,Will... Action,Drama,History,Romance,War United States ... NA 183 movie 0 1 0 0 0 Hulu 3.05

5 rows × 21 columns

In [75]:
fig = px.bar(y = hulu_screentimes_high_movies['Title'][:15],
             x = hulu_screentimes_high_movies['Screentime'][:15], 
             color = hulu_screentimes_high_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Highest Screentime in Hours : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [76]:
fig = px.bar(y = hulu_screentimes_low_movies['Title'][:15],
             x = hulu_screentimes_low_movies['Screentime'][:15], 
             color = hulu_screentimes_low_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Lowest Screentime in Hours : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [77]:
prime_video_screentimes_high_movies = df_screentimes_high_movies.loc[df_screentimes_high_movies['Prime Video']==1].reset_index()
prime_video_screentimes_high_movies = prime_video_screentimes_high_movies.drop(['index'], axis = 1)
 
prime_video_screentimes_low_movies = df_screentimes_low_movies.loc[df_screentimes_low_movies['Prime Video']==1].reset_index()
prime_video_screentimes_low_movies = prime_video_screentimes_low_movies.drop(['index'], axis = 1)
 
prime_video_screentimes_high_movies.head(5)
Out[77]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 16741 The Remarkable 20th Century 2004 NR 7.8 NA Scott Popjes,Steven Vosburgh Howard K. Smith,Jimmy Hodson Documentary United States ... This four-part series takes an in-depth look a... 600 movie 0 0 1 0 0 Prime Video 10.00
1 16720 The Ultimate Civil War Series: 150th Anniversa... 2012 NR 6.9 NA Kevin R. Hershberger Steve Alexander,Randy Allen,Coby Batty,Scott W... Documentary,Action,Drama,History,War United States ... NA 353 movie 0 0 1 0 0 Prime Video 5.88
2 12686 Custer's Last Stand 1936 NR 4.7 NA Elmer Clifton Rex Lease,Lona Andre,William Farnum,Ruth Mix,J... Adventure,History,Romance,War,Western United States ... A cruel and ruthless bandit kills a tavern own... 328 movie 0 0 1 0 0 Prime Video 5.47
3 5520 The Greatest Story Ever Told 1965 0 6.6 41 George Stevens,David Lean,Jean Negulesco Max von Sydow,Michael Anderson Jr.,Carroll Bak... Biography,Drama,History United States ... At 30, Patrick O'Brien was TransFatty, a New Y... 260 movie 0 0 1 0 0 Prime Video 4.33
4 4536 Tom Petty and the Heartbreakers: Runnin' Down ... 2007 NR 8.6 100 Peter Bogdanovich Neil Armstrong,Mick Avory,Ron Blair,Peter Bogd... Documentary,Music United States ... NA 259 movie 0 0 1 0 0 Prime Video 4.32

5 rows × 21 columns

In [78]:
fig = px.bar(y = prime_video_screentimes_high_movies['Title'][:15],
             x = prime_video_screentimes_high_movies['Screentime'][:15], 
             color = prime_video_screentimes_high_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Highest Screentime in Hours : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [79]:
fig = px.bar(y = prime_video_screentimes_low_movies['Title'][:15],
             x = prime_video_screentimes_low_movies['Screentime'][:15], 
             color = prime_video_screentimes_low_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Lowest Screentime in Hours : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [80]:
disney_screentimes_high_movies = df_screentimes_high_movies.loc[df_screentimes_high_movies['Disney+']==1].reset_index()
disney_screentimes_high_movies = disney_screentimes_high_movies.drop(['index'], axis = 1)
 
disney_screentimes_low_movies = df_screentimes_low_movies.loc[df_screentimes_low_movies['Disney+']==1].reset_index()
disney_screentimes_low_movies = disney_screentimes_low_movies.drop(['index'], axis = 1)
 
disney_screentimes_high_movies.head(5)
Out[80]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider Screentime
0 15735 Avengers: Endgame 2019 13 8.4 94 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Evans,Mark Ruffalo,Chr... Action,Adventure,Drama,Sci-Fi United States ... An elderly man reads the book "The Princess Br... 181 movie 0 0 0 1 0 Disney+ 3.02
1 15774 The Sound of Music 1965 0 8 83 Robert Wise Julie Andrews,Christopher Plummer,Eleanor Park... Biography,Drama,Family,Musical,Romance United States ... In this animated comedy from the folks at Disn... 172 movie 0 0 0 1 0 Disney+ 2.87
2 15803 Pirates of the Caribbean: At World's End 2007 13 7.1 44 Gore Verbinski Johnny Depp,Geoffrey Rush,Orlando Bloom,Keira ... Action,Adventure,Fantasy United States ... The Good Dinosaur asks the question: What if t... 169 movie 0 0 0 1 0 Disney+ 2.82
3 15970 Around the World in 80 Days 2004 0 6.8 32 Michael Anderson,John Farrow Cantinflas,Finlay Currie,Robert Morley,Ronald ... Adventure,Comedy,Family,Romance United States ... Race car driver, Jim Douglas goes to Monte Car... 167 movie 0 0 0 1 0 Disney+ 2.78
4 15793 Star Wars: The Last Jedi 2017 13 7 90 Rian Johnson Mark Hamill,Carrie Fisher,Adam Driver,Daisy Ri... Action,Adventure,Fantasy,Sci-Fi United States ... While living the quiet life in a swamp, Kermit... 152 movie 0 0 0 1 0 Disney+ 2.53

5 rows × 21 columns

In [81]:
fig = px.bar(y = disney_screentimes_high_movies['Title'][:15],
             x = disney_screentimes_high_movies['Screentime'][:15], 
             color = disney_screentimes_high_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Highest Screentime in Hours : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [82]:
fig = px.bar(y = disney_screentimes_low_movies['Title'][:15],
             x = disney_screentimes_low_movies['Screentime'][:15], 
             color = disney_screentimes_low_movies['Screentime'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Lowest Screentime in Hours : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [83]:
print(f'''
      The Movie with Highest Screentime  Ever Got is '{df_screentimes_high_movies['Title'][0]}' : '{df_screentimes_high_movies['Screentime'].max()}'\n
      The Movie with Lowest Screentime  Ever Got is '{df_screentimes_low_movies['Title'][0]}' : '{df_screentimes_low_movies['Screentime'].min()}'\n
      
      The Movie with Highest Screentime  on 'Netflix' is '{netflix_screentimes_high_movies['Title'][0]}' : '{netflix_screentimes_high_movies['Screentime'].max()}'\n
      The Movie with Lowest Screentime  on 'Netflix' is '{netflix_screentimes_low_movies['Title'][0]}' : '{netflix_screentimes_low_movies['Screentime'].min()}'\n
      
      The Movie with Highest Screentime  on 'Hulu' is '{hulu_screentimes_high_movies['Title'][0]}' : '{hulu_screentimes_high_movies['Screentime'].max()}'\n
      The Movie with Lowest Screentime  on 'Hulu' is '{hulu_screentimes_low_movies['Title'][0]}' : '{hulu_screentimes_low_movies['Screentime'].min()}'\n
      
      The Movie with Highest Screentime  on 'Prime Video' is '{prime_video_screentimes_high_movies['Title'][0]}' : '{prime_video_screentimes_high_movies['Screentime'].max()}'\n
      The Movie with Lowest Screentime  on 'Prime Video' is '{prime_video_screentimes_low_movies['Title'][0]}' : '{prime_video_screentimes_low_movies['Screentime'].min()}'\n
      
      The Movie with Highest Screentime  on 'Disney+' is '{disney_screentimes_high_movies['Title'][0]}' : '{disney_screentimes_high_movies['Screentime'].max()}'\n
      The Movie with Lowest Screentime  on 'Disney+' is '{disney_screentimes_low_movies['Title'][0]}' : '{disney_screentimes_low_movies['Screentime'].min()}'\n 
      ''')
      The Movie with Highest Screentime  Ever Got is 'The Remarkable 20th Century' : '10.0'

      The Movie with Lowest Screentime  Ever Got is 'Thanksgiving' : '0.03'

      
      The Movie with Highest Screentime  on 'Netflix' is 'The Gospel of Matthew' : '4.3'

      The Movie with Lowest Screentime  on 'Netflix' is 'Silent' : '0.05'

      
      The Movie with Highest Screentime  on 'Hulu' is 'Dina' : '4.42'

      The Movie with Lowest Screentime  on 'Hulu' is 'A Brief History' : '0.05'

      
      The Movie with Highest Screentime  on 'Prime Video' is 'The Remarkable 20th Century' : '10.0'

      The Movie with Lowest Screentime  on 'Prime Video' is 'Thanksgiving' : '0.03'

      
      The Movie with Highest Screentime  on 'Disney+' is 'Avengers: Endgame' : '3.02'

      The Movie with Lowest Screentime  on 'Disney+' is 'Luxo Jr.' : '0.03'
 
      
In [84]:
print(f'''
      Accross All Platforms the Average Screentime  is '{round(df_movies_screentimes['Screentime'].mean(), ndigits = 2)}'\n
      The Average Screentime  on 'Netflix' is '{round(netflix_screentimes_movies['Screentime'].mean(), ndigits = 2)}'\n
      The Average Screentime  on 'Hulu' is '{round(hulu_screentimes_movies['Screentime'].mean(), ndigits = 2)}'\n
      The Average Screentime  on 'Prime Video' is '{round(prime_video_screentimes_movies['Screentime'].mean(), ndigits = 2)}'\n
      The Average Screentime  on 'Disney+' is '{round(disney_screentimes_movies['Screentime'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Screentime  is '1.57'

      The Average Screentime  on 'Netflix' is '1.67'

      The Average Screentime  on 'Hulu' is '1.62'

      The Average Screentime  on 'Prime Video' is '1.55'

      The Average Screentime  on 'Disney+' is '1.53'
 
      
In [85]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_movies_screentimes['Screentime'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_movies_screentimes['Screentime'], ax = ax[1])
plt.show()
In [86]:
# Defining plot size and title
plt.figure(figsize = (20, 10))
plt.title('Screentime s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_screentimes_movies['Screentime'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_screentimes_movies['Screentime'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_screentimes_movies['Screentime'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_screentimes_movies['Screentime'][:100], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [87]:
def round_val(data):
    if str(data) != 'nan':
        return round(data)
In [88]:
df_movies_screentimes_group = df_movies_screentimes.copy()
In [89]:
df_movies_screentimes_group['Screentime Group'] = df_movies_screentimes['Screentime'].apply(round_val)
 
screentimes_values = df_movies_screentimes_group['Screentime Group'].value_counts().sort_index(ascending = False).tolist()
screentimes_index = df_movies_screentimes_group['Screentime Group'].value_counts().sort_index(ascending = False).index
 
# screentimes_values, screentimes_index
In [90]:
screentimes_group_count = df_movies_screentimes_group.groupby('Screentime Group')['Title'].count()
screentimes_group_movies = df_movies_screentimes_group.groupby('Screentime Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
screentimes_group_data_movies = pd.concat([screentimes_group_count, screentimes_group_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
screentimes_group_data_movies = screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [91]:
# Screentime Group with Movies Counts - All Platforms Combined
screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
Out[91]:
Screentime Group Movies Count Netflix Hulu Prime Video Disney+
2 2 9573 2332 717 6681 303
1 1 6180 949 294 4841 225
3 3 449 147 15 315 7
0 0 311 66 13 226 17
4 4 25 7 3 17 0
5 5 1 0 0 1 0
6 6 1 0 0 1 0
7 10 1 0 0 1 0
In [92]:
screentimes_group_data_movies.sort_values(by = 'Screentime Group', ascending = False)
Out[92]:
Screentime Group Movies Count Netflix Hulu Prime Video Disney+
7 10 1 0 0 1 0
6 6 1 0 0 1 0
5 5 1 0 0 1 0
4 4 25 7 3 17 0
3 3 449 147 15 315 7
2 2 9573 2332 717 6681 303
1 1 6180 949 294 4841 225
0 0 311 66 13 226 17
In [93]:
fig = px.bar(y = screentimes_group_data_movies['Movies Count'],
             x = screentimes_group_data_movies['Screentime Group'], 
             color = screentimes_group_data_movies['Screentime Group'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies Count', 'x' : 'Screentime : In Hours'},
             title  = 'Movies with Group Screentime in Hours : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [94]:
fig = px.pie(screentimes_group_data_movies[:10],
             names = screentimes_group_data_movies['Screentime Group'],
             values = screentimes_group_data_movies['Movies Count'],
             color = screentimes_group_data_movies['Movies Count'],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'Movies Count based on Screentime Group')
fig.show()
In [95]:
df_screentimes_group_high_movies = screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_screentimes_group_high_movies = df_screentimes_group_high_movies.drop(['index'], axis = 1)
# filter = (screentimes_group_data_movies['Movies Count'] ==  (screentimes_group_data_movies['Movies Count'].max()))
# df_screentimes_group_high_movies = screentimes_group_data_movies[filter]
 
# highest_rated_movies = screentimes_group_data_movies.loc[screentimes_group_data_movies['Movies Count'].idxmax()]
 
# print('\nScreentime with Highest Ever Movies Count are : All Platforms Combined\n')
df_screentimes_group_high_movies.head(5)
Out[95]:
Screentime Group Movies Count Netflix Hulu Prime Video Disney+
0 2 9573 2332 717 6681 303
1 1 6180 949 294 4841 225
2 3 449 147 15 315 7
3 0 311 66 13 226 17
4 4 25 7 3 17 0
In [96]:
df_screentimes_group_low_movies = screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_screentimes_group_low_movies = df_screentimes_group_low_movies.drop(['index'], axis = 1)
# filter = (screentimes_group_data_movies['Movies Count'] = =  (screentimes_group_data_movies['Movies Count'].min()))
# df_screentimes_group_low_movies = screentimes_group_data_movies[filter]
 
# print('\nScreentime with Lowest Ever Movies Count are : All Platforms Combined\n')
df_screentimes_group_low_movies.head(5)
Out[96]:
Screentime Group Movies Count Netflix Hulu Prime Video Disney+
0 5 1 0 0 1 0
1 6 1 0 0 1 0
2 10 1 0 0 1 0
3 4 25 7 3 17 0
4 0 311 66 13 226 17
In [97]:
print(f'''
      Total '{df_movies_screentimes['Screentime'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see Movies from Total '{screentimes_group_data_movies['Screentime Group'].unique().shape[0]}' Screentime Group, They were Like this, \n
 
      {screentimes_group_data_movies.sort_values(by = 'Movies Count', ascending = False)['Screentime Group'].unique()} etc. \n
 
      The Screentime Group with Highest Movies Count have '{screentimes_group_data_movies['Movies Count'].max()}' Movies Available is '{df_screentimes_group_high_movies['Screentime Group'][0]}', &\n
      The Screentime Group with Lowest Movies Count have '{screentimes_group_data_movies['Movies Count'].min()}' Movies Available is '{df_screentimes_group_low_movies['Screentime Group'][0]}'
      ''')
      Total '16541' Titles are available on All Platforms, out of which

      You Can Choose to see Movies from Total '8' Screentime Group, They were Like this, 

 
      [ 2  1  3  0  4  5  6 10] etc. 

 
      The Screentime Group with Highest Movies Count have '9573' Movies Available is '2', &

      The Screentime Group with Lowest Movies Count have '1' Movies Available is '5'
      
In [98]:
netflix_screentimes_group_movies = screentimes_group_data_movies[screentimes_group_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_screentimes_group_movies = netflix_screentimes_group_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_screentimes_group_high_movies = df_screentimes_group_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_screentimes_group_high_movies = netflix_screentimes_group_high_movies.drop(['index'], axis = 1)
 
netflix_screentimes_group_low_movies = df_screentimes_group_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_screentimes_group_low_movies = netflix_screentimes_group_low_movies.drop(['index'], axis = 1)
 
netflix_screentimes_group_high_movies.head(5)
Out[98]:
Screentime Group Movies Count Netflix Hulu Prime Video Disney+
0 2 9573 2332 717 6681 303
1 1 6180 949 294 4841 225
2 3 449 147 15 315 7
3 0 311 66 13 226 17
4 4 25 7 3 17 0
In [99]:
hulu_screentimes_group_movies = screentimes_group_data_movies[screentimes_group_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_screentimes_group_movies = hulu_screentimes_group_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_screentimes_group_high_movies = df_screentimes_group_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_screentimes_group_high_movies = hulu_screentimes_group_high_movies.drop(['index'], axis = 1)
 
hulu_screentimes_group_low_movies = df_screentimes_group_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_screentimes_group_low_movies = hulu_screentimes_group_low_movies.drop(['index'], axis = 1)
 
hulu_screentimes_group_high_movies.head(5)
Out[99]:
Screentime Group Movies Count Netflix Hulu Prime Video Disney+
0 2 9573 2332 717 6681 303
1 1 6180 949 294 4841 225
2 3 449 147 15 315 7
3 0 311 66 13 226 17
4 4 25 7 3 17 0
In [100]:
prime_video_screentimes_group_movies = screentimes_group_data_movies[screentimes_group_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_screentimes_group_movies = prime_video_screentimes_group_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_screentimes_group_high_movies = df_screentimes_group_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_screentimes_group_high_movies = prime_video_screentimes_group_high_movies.drop(['index'], axis = 1)
 
prime_video_screentimes_group_low_movies = df_screentimes_group_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_screentimes_group_low_movies = prime_video_screentimes_group_low_movies.drop(['index'], axis = 1)
 
prime_video_screentimes_group_high_movies.head(5)
Out[100]:
Screentime Group Movies Count Netflix Hulu Prime Video Disney+
0 2 9573 2332 717 6681 303
1 1 6180 949 294 4841 225
2 3 449 147 15 315 7
3 0 311 66 13 226 17
4 4 25 7 3 17 0
In [101]:
disney_screentimes_group_movies = screentimes_group_data_movies[screentimes_group_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_screentimes_group_movies = disney_screentimes_group_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_screentimes_group_high_movies = df_screentimes_group_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_screentimes_group_high_movies = disney_screentimes_group_high_movies.drop(['index'], axis = 1)
 
disney_screentimes_group_low_movies = df_screentimes_group_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_screentimes_group_low_movies = disney_screentimes_group_low_movies.drop(['index'], axis = 1)
 
disney_screentimes_group_high_movies.head(5)
Out[101]:
Screentime Group Movies Count Netflix Hulu Prime Video Disney+
0 2 9573 2332 717 6681 303
1 1 6180 949 294 4841 225
2 0 311 66 13 226 17
3 3 449 147 15 315 7
4 4 25 7 3 17 0
In [102]:
print(f'''
      The Screentime Group with Highest Movies Count Ever Got is '{df_screentimes_group_high_movies['Screentime Group'][0]}' : '{df_screentimes_group_high_movies['Movies Count'].max()}'\n
      The Screentime Group with Lowest Movies Count Ever Got is '{df_screentimes_group_low_movies['Screentime Group'][0]}' : '{df_screentimes_group_low_movies['Movies Count'].min()}'\n
      
      The Screentime Group with Highest Movies Count on 'Netflix' is '{netflix_screentimes_group_high_movies['Screentime Group'][0]}' : '{netflix_screentimes_group_high_movies['Netflix'].max()}'\n
      The Screentime Group with Lowest Movies Count on 'Netflix' is '{netflix_screentimes_group_low_movies['Screentime Group'][0]}' : '{netflix_screentimes_group_low_movies['Netflix'].min()}'\n
      
      The Screentime Group with Highest Movies Count on 'Hulu' is '{hulu_screentimes_group_high_movies['Screentime Group'][0]}' : '{hulu_screentimes_group_high_movies['Hulu'].max()}'\n
      The Screentime Group with Lowest Movies Count on 'Hulu' is '{hulu_screentimes_group_low_movies['Screentime Group'][0]}' : '{hulu_screentimes_group_low_movies['Hulu'].min()}'\n
      
      The Screentime Group with Highest Movies Count on 'Prime Video' is '{prime_video_screentimes_group_high_movies['Screentime Group'][0]}' : '{prime_video_screentimes_group_high_movies['Prime Video'].max()}'\n
      The Screentime Group with Lowest Movies Count on 'Prime Video' is '{prime_video_screentimes_group_low_movies['Screentime Group'][0]}' : '{prime_video_screentimes_group_low_movies['Prime Video'].min()}'\n
      
      The Screentime Group with Highest Movies Count on 'Disney+' is '{disney_screentimes_group_high_movies['Screentime Group'][0]}' : '{disney_screentimes_group_high_movies['Disney+'].max()}'\n
      The Screentime Group with Lowest Movies Count on 'Disney+' is '{disney_screentimes_group_low_movies['Screentime Group'][0]}' : '{disney_screentimes_group_low_movies['Disney+'].min()}'\n 
      ''')
      The Screentime Group with Highest Movies Count Ever Got is '2' : '9573'

      The Screentime Group with Lowest Movies Count Ever Got is '5' : '1'

      
      The Screentime Group with Highest Movies Count on 'Netflix' is '2' : '2332'

      The Screentime Group with Lowest Movies Count on 'Netflix' is '5' : '0'

      
      The Screentime Group with Highest Movies Count on 'Hulu' is '2' : '717'

      The Screentime Group with Lowest Movies Count on 'Hulu' is '5' : '0'

      
      The Screentime Group with Highest Movies Count on 'Prime Video' is '2' : '6681'

      The Screentime Group with Lowest Movies Count on 'Prime Video' is '5' : '1'

      
      The Screentime Group with Highest Movies Count on 'Disney+' is '2' : '303'

      The Screentime Group with Lowest Movies Count on 'Disney+' is '4' : '0'
 
      
In [103]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_scr_ax1 = sns.barplot(x = netflix_screentimes_group_movies['Screentime Group'][:10], y = netflix_screentimes_group_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_scr_ax2 = sns.barplot(x = hulu_screentimes_group_movies['Screentime Group'][:10], y = hulu_screentimes_group_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_scr_ax3 = sns.barplot(x = prime_video_screentimes_group_movies['Screentime Group'][:10], y = prime_video_screentimes_group_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_scr_ax4 = sns.barplot(x = disney_screentimes_group_movies['Screentime Group'][:10], y = disney_screentimes_group_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_scr_ax1.title.set_text(labels[0])
h_scr_ax2.title.set_text(labels[1])
p_scr_ax3.title.set_text(labels[2])
d_scr_ax4.title.set_text(labels[3])
 
plt.show()
In [104]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = screentimes_group_data_movies['Screentime Group'], y = screentimes_group_data_movies['Netflix'], color = 'red')
sns.lineplot(x = screentimes_group_data_movies['Screentime Group'], y = screentimes_group_data_movies['Hulu'], color = 'lightgreen')
sns.lineplot(x = screentimes_group_data_movies['Screentime Group'], y = screentimes_group_data_movies['Prime Video'], color = 'lightblue')
sns.lineplot(x = screentimes_group_data_movies['Screentime Group'], y = screentimes_group_data_movies['Disney+'], color = 'darkblue')
plt.xlabel('Screentime Group', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
In [105]:
print(f'''
      Accross All Platforms Total Count of Screentime Group is '{screentimes_group_data_movies['Screentime Group'].unique().shape[0]}'\n
      Total Count of Screentime Group on 'Netflix' is '{netflix_screentimes_group_movies['Screentime Group'].unique().shape[0]}'\n
      Total Count of Screentime Group on 'Hulu' is '{hulu_screentimes_group_movies['Screentime Group'].unique().shape[0]}'\n
      Total Count of Screentime Group on 'Prime Video' is '{prime_video_screentimes_group_movies['Screentime Group'].unique().shape[0]}'\n
      Total Count of Screentime Group on 'Disney+' is '{disney_screentimes_group_movies['Screentime Group'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Screentime Group is '8'

      Total Count of Screentime Group on 'Netflix' is '5'

      Total Count of Screentime Group on 'Hulu' is '5'

      Total Count of Screentime Group on 'Prime Video' is '8'

      Total Count of Screentime Group on 'Disney+' is '4'
 
      
In [106]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_scr_ax1 = sns.lineplot(y = screentimes_group_data_movies['Screentime Group'], x = screentimes_group_data_movies['Netflix'], color = 'red', ax = axes[0, 0])
h_scr_ax2 = sns.lineplot(y = screentimes_group_data_movies['Screentime Group'], x = screentimes_group_data_movies['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_scr_ax3 = sns.lineplot(y = screentimes_group_data_movies['Screentime Group'], x = screentimes_group_data_movies['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_scr_ax4 = sns.lineplot(y = screentimes_group_data_movies['Screentime Group'], x = screentimes_group_data_movies['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_scr_ax1.title.set_text(labels[0])
h_scr_ax2.title.set_text(labels[1])
p_scr_ax3.title.set_text(labels[2])
d_scr_ax4.title.set_text(labels[3])

plt.show()
In [107]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = screentimes_group_data_movies['Screentime Group'][:10], y = screentimes_group_data_movies['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = screentimes_group_data_movies['Screentime Group'][:10], y = screentimes_group_data_movies['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = screentimes_group_data_movies['Screentime Group'][:10], y = screentimes_group_data_movies['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = screentimes_group_data_movies['Screentime Group'][:10], y = screentimes_group_data_movies['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()